#include "gl_wrapper.h"

extern int gLocalSize[3];

const char* glsl_op_add =
  "#version 320 es\n"
  "#define PRECISION highp\n"
  "precision PRECISION float;\n"
  "#define FORMAT rgba32f\n"
  "layout(FORMAT, binding=0) writeonly uniform PRECISION image3D uOutput;\n"
  "layout(FORMAT, binding=1) readonly uniform PRECISION image3D uInput0;\n"
  "layout(FORMAT, binding=2) readonly uniform PRECISION image3D uInput1;\n"
  "layout(location=3) uniform ivec4 imgSize;\n"
  "layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;\n"
  "void main()\n"
  "{\n"
  "    ivec3 pos = ivec3(gl_GlobalInvocationID);\n"
  "    ivec3 inSize = imgSize.xyz;\n"
  "    if(all(lessThan(pos, inSize)))\n"
  "    {\n"
  "        vec4 sum = imageLoad(uInput0, pos) + imageLoad(uInput1, pos);\n"
  "        imageStore(uOutput, pos, sum);\n"
  "    }\n"
  "}\n";

void GLOpAdd(GLuint input1, GLuint input2, GLuint output, int width, int height, int channel) {
  GLuint computeProgram = CreateComputeProgram(glsl_op_add);
  glUseProgram(computeProgram);

  glBindImageTexture(0, output, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA32F);
  glBindImageTexture(1, input1, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA32F);
  glBindImageTexture(2, input2, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA32F);
  glUniform4i(3, width, height, UP_DIV(channel, 4), 1);

  int c_4 = UP_DIV(channel, 4);
  glDispatchCompute(UP_DIV(width, gLocalSize[0]), UP_DIV(height, gLocalSize[1]), UP_DIV(c_4, gLocalSize[2]));
  glDeleteProgram(computeProgram);
}